library(mosaic)
library(tidyverse)
library(lubridate)
library(DataComputing)
library(rvest)
library(broom)

Research Focus:

As COVID-19 spreads at an alarming rate, a pressing question at a global scale emerges– what factors of a country contribute to the spread of Coronavirus. The factors which we will analyze are: population density, and proximity to origin point (China).

Data Access

Reading in the Data:

Data Source 1: COVID

COVID <- read.csv(file = "total-covid-cases-deaths-per-million.csv")
COVID
COVID %>%
  nrow()
[1] 9487
COVID %>%
  names()
  [1] "total.covid.cases.deaths.per.million" "X"                                   
  [3] "X.1"                                  "X.2"                                 
  [5] "X.3"                                  "X.4"                                 
  [7] "X.5"                                  "X.6"                                 
  [9] "X.7"                                  "X.8"                                 
 [11] "X.9"                                  "X.10"                                
 [13] "X.11"                                 "X.12"                                
 [15] "X.13"                                 "X.14"                                
 [17] "X.15"                                 "X.16"                                
 [19] "X.17"                                 "X.18"                                
 [21] "X.19"                                 "X.20"                                
 [23] "X.21"                                 "X.22"                                
 [25] "X.23"                                 "X.24"                                
 [27] "X.25"                                 "X.26"                                
 [29] "X.27"                                 "X.28"                                
 [31] "X.29"                                 "X.30"                                
 [33] "X.31"                                 "X.32"                                
 [35] "X.33"                                 "X.34"                                
 [37] "X.35"                                 "X.36"                                
 [39] "X.37"                                 "X.38"                                
 [41] "X.39"                                 "X.40"                                
 [43] "X.41"                                 "X.42"                                
 [45] "X.43"                                 "X.44"                                
 [47] "X.45"                                 "X.46"                                
 [49] "X.47"                                 "X.48"                                
 [51] "X.49"                                 "X.50"                                
 [53] "X.51"                                 "X.52"                                
 [55] "X.53"                                 "X.54"                                
 [57] "X.55"                                 "X.56"                                
 [59] "X.57"                                 "X.58"                                
 [61] "X.59"                                 "X.60"                                
 [63] "X.61"                                 "X.62"                                
 [65] "X.63"                                 "X.64"                                
 [67] "X.65"                                 "X.66"                                
 [69] "X.67"                                 "X.68"                                
 [71] "X.69"                                 "X.70"                                
 [73] "X.71"                                 "X.72"                                
 [75] "X.73"                                 "X.74"                                
 [77] "X.75"                                 "X.76"                                
 [79] "X.77"                                 "X.78"                                
 [81] "X.79"                                 "X.80"                                
 [83] "X.81"                                 "X.82"                                
 [85] "X.83"                                 "X.84"                                
 [87] "X.85"                                 "X.86"                                
 [89] "X.87"                                 "X.88"                                
 [91] "X.89"                                 "X.90"                                
 [93] "X.91"                                 "X.92"                                
 [95] "X.93"                                 "X.94"                                
 [97] "X.95"                                 "X.96"                                
 [99] "X.97"                                 "X.98"                                
[101] "X.99"                                 "X.100"                               
[103] "X.101"                                "X.102"                               
[105] "X.103"                                "X.104"                               
[107] "X.105"                                "X.106"                               
[109] "X.107"                                "X.108"                               
[111] "X.109"                                "X.110"                               
[113] "X.111"                                "X.112"                               
[115] "X.113"                                "X.114"                               
[117] "X.115"                                "X.116"                               
[119] "X.117"                                "X.118"                               
[121] "X.119"                                "X.120"                               
[123] "X.121"                                "X.122"                               
[125] "X.123"                                "X.124"                               
[127] "X.125"                                "X.126"                               
[129] "X.127"                                "X.128"                               
[131] "X.129"                                "X.130"                               
[133] "X.131"                                "X.132"                               
[135] "X.133"                                "X.134"                               
[137] "X.135"                                "X.136"                               
[139] "X.137"                                "X.138"                               
[141] "X.139"                                "X.140"                               
[143] "X.141"                                "X.142"                               
[145] "X.143"                                "X.144"                               
[147] "X.145"                                "X.146"                               
[149] "X.147"                                "X.148"                               
[151] "X.149"                                "X.150"                               
[153] "X.151"                                "X.152"                               
[155] "X.153"                                "X.154"                               
[157] "X.155"                                "X.156"                               
[159] "X.157"                                "X.158"                               
[161] "X.159"                                "X.160"                               
[163] "X.161"                                "X.162"                               
[165] "X.163"                                "X.164"                               
[167] "X.165"                                "X.166"                               
[169] "X.167"                                "X.168"                               
[171] "X.169"                                "X.170"                               
[173] "X.171"                                "X.172"                               
[175] "X.173"                                "X.174"                               
[177] "X.175"                                "X.176"                               
[179] "X.177"                                "X.178"                               
[181] "X.179"                                "X.180"                               
[183] "X.181"                                "X.182"                               
[185] "X.183"                                "X.184"                               
[187] "X.185"                                "X.186"                               
[189] "X.187"                                "X.188"                               
[191] "X.189"                                "X.190"                               
[193] "X.191"                                "X.192"                               
[195] "X.193"                                "X.194"                               
[197] "X.195"                                "X.196"                               
[199] "X.197"                                "X.198"                               
[201] "X.199"                                "X.200"                               
[203] "X.201"                                "X.202"                               
[205] "X.203"                                "X.204"                               
[207] "X.205"                                "X.206"                               
[209] "X.207"                                "X.208"                               
[211] "X.209"                                "X.210"                               
[213] "X.211"                                "X.212"                               
[215] "X.213"                                "X.214"                               
[217] "X.215"                                "X.216"                               
[219] "X.217"                                "X.218"                               
[221] "X.219"                                "X.220"                               
[223] "X.221"                                "X.222"                               
[225] "X.223"                                "X.224"                               
[227] "X.225"                                "X.226"                               
[229] "X.227"                                "X.228"                               
[231] "X.229"                                "X.230"                               
[233] "X.231"                                "X.232"                               
[235] "X.233"                                "X.234"                               
[237] "X.235"                                "X.236"                               
[239] "X.237"                                "X.238"                               
[241] "X.239"                                "X.240"                               
[243] "X.241"                                "X.242"                               
[245] "X.243"                                "X.244"                               
[247] "X.245"                                "X.246"                               
[249] "X.247"                                "X.248"                               
[251] "X.249"                                "X.250"                               
[253] "X.251"                                "X.252"                               
[255] "X.253"                                "X.254"                               
COVID %>%
  head()

Data Source 2: CountryData

CountryData
CountryData %>%
  nrow()
[1] 256
CountryData %>%
  names()
 [1] "country"           "area"              "pop"               "growth"            "birth"            
 [6] "death"             "migr"              "maternal"          "infant"            "life"             
[11] "fert"              "health"            "HIVrate"           "HIVpeople"         "HIVdeath"         
[16] "obesity"           "underweight"       "educ"              "unemploymentYouth" "GDP"              
[21] "GDPgrowth"         "GDPcapita"         "saving"            "indProd"           "labor"            
[26] "unemployment"      "family"            "tax"               "budget"            "debt"             
[31] "inflation"         "discount"          "lending"           "narrow"            "broad"            
[36] "credit"            "shares"            "balance"           "exports"           "imports"          
[41] "gold"              "externalDebt"      "homeStock"         "abroadStock"       "elecProd"         
[46] "elecCons"          "elecExp"           "elecImp"           "elecCap"           "elecFossil"       
[51] "elecNuc"           "elecHydro"         "elecRenew"         "oilProd"           "oilExp"           
[56] "oilImp"            "oilRes"            "petroProd"         "petroCons"         "petroExp"         
[61] "petroImp"          "gasProd"           "gasCons"           "gasExp"            "gasImp"           
[66] "gasRes"            "mainlines"         "cell"              "netHosts"          "netUsers"         
[71] "airports"          "railways"          "roadways"          "waterways"         "marine"           
[76] "military"         
CountryData %>%
  head()

Data Source 3: Continents

Continents <- read.csv(file = "countries and continents.csv")
Continents
Continents %>%
  nrow()
[1] 251
Continents %>%
  names()
 [1] "name"                             "official_name_en"                 "official_name_fr"                
 [4] "ISO3166.1.Alpha.2"                "ISO3166.1.Alpha.3"                "M49"                             
 [7] "ITU"                              "MARC"                             "WMO"                             
[10] "DS"                               "Dial"                             "FIFA"                            
[13] "FIPS"                             "GAUL"                             "IOC"                             
[16] "ISO4217.currency_alphabetic_code" "ISO4217.currency_country_name"    "ISO4217.currency_minor_unit"     
[19] "ISO4217.currency_name"            "ISO4217.currency_numeric_code"    "is_independent"                  
[22] "Capital"                          "Continent"                        "TLD"                             
[25] "Languages"                        "Geoname.ID"                       "EDGAR"                           
Continents %>%
  head()

Data Wrangling of COVID Dataset

COVID

Since we are soley focused on the spread of COVID-19, filter out death count.

TidyCOVID <- COVID %>%
  rename(country = total.covid.cases.deaths.per.million ) %>%
  rename( Code = X ) %>%
  rename(Date = X.1 ) %>%
  rename(CasesPerMillion = X.3) %>%
  filter(row_number() > 1) %>%
  subset(select = c(1,3,5)) %>%
  mutate( country = as.character(country) ) %>%
  mutate(Date = mdy(Date)) %>%
  mutate(CasesPerMillion = as.integer(CasesPerMillion) - 1)
TidyCOVID
RelevantCountryData <-
  CountryData %>%
  subset(select = c(1,2,3)) %>%
  mutate(popdensity = round(pop/area, digits = 2))
MasterData <- left_join(TidyCOVID, RelevantCountryData)
Joining, by = "country"
MasterData <-
  MasterData %>%
  filter(country != "Africa",
         country != "Asia",
         country != "Europe",
         country != "North America",
         country != "Oceania",
         country != "South America",
         country != "World"
         ) %>%
  mutate("Cases" = (CasesPerMillion * round(pop/1000000, digits = 0)))
  
  
  MasterData
FirstInstance <-
  MasterData %>%
  filter(Cases != 0) %>%
  group_by(country) %>%
  summarise(beginningofspread = min(Date))
  
FirstInstance
DailySpread <-
  left_join(MasterData, FirstInstance) %>%
  filter(Date == "2020-04-05") %>%
  mutate(dayselapsed = Date - beginningofspread) %>%
  mutate(dailyspread = Cases / as.numeric(dayselapsed) ) %>%
  arrange(desc(dailyspread))
Joining, by = "country"
MasterData <-
  left_join(MasterData, DailySpread)
Joining, by = c("country", "Date", "CasesPerMillion", "area", "pop", "popdensity", "Cases")
ggplot(data=MasterData,aes(x=pop,y=dailyspread))+geom_point() 

MasterData
MasterData %>%
  group_by(Date) %>%
  summarise(totalcases = sum(Cases, na.rm=T)) %>%
  ggplot(aes(x=Date,y=totalcases))+geom_point() 

LS0tCnRpdGxlOiAiRmluYWwgUHJvamVjdCIKb3V0cHV0OiBodG1sX25vdGVib29rCmF1dGhvcnM6ICJFdmVseW4gTXVycmF5IGFuZCBKb3NlcGggUGV2bmVyIgotLS0KCmBgYHtyfQpsaWJyYXJ5KG1vc2FpYykKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkobHVicmlkYXRlKQpsaWJyYXJ5KERhdGFDb21wdXRpbmcpCmxpYnJhcnkocnZlc3QpCmxpYnJhcnkoYnJvb20pCmBgYAoKIyMgUmVzZWFyY2ggRm9jdXM6CgpBcyBDT1ZJRC0xOSBzcHJlYWRzIGF0IGFuIGFsYXJtaW5nIHJhdGUsIGEgcHJlc3NpbmcgcXVlc3Rpb24gYXQgYSBnbG9iYWwgc2NhbGUgZW1lcmdlcy0tIHdoYXQgZmFjdG9ycyBvZiBhIGNvdW50cnkgY29udHJpYnV0ZSB0byB0aGUgc3ByZWFkIG9mIENvcm9uYXZpcnVzLiBUaGUgZmFjdG9ycyB3aGljaCB3ZSB3aWxsIGFuYWx5emUgYXJlOiBwb3B1bGF0aW9uIGRlbnNpdHksIGFuZCBwcm94aW1pdHkgdG8gb3JpZ2luIHBvaW50IChDaGluYSkuCgoKIyMgRGF0YSBBY2Nlc3MKClJlYWRpbmcgaW4gdGhlIERhdGE6CgoKRGF0YSBTb3VyY2UgMTogQ09WSUQKYGBge3J9CkNPVklEIDwtIHJlYWQuY3N2KGZpbGUgPSAidG90YWwtY292aWQtY2FzZXMtZGVhdGhzLXBlci1taWxsaW9uLmNzdiIpCkNPVklECmBgYAoKYGBge3J9CkNPVklEICU+JQogIG5yb3coKQpgYGAKYGBge3J9CkNPVklEICU+JQogIG5hbWVzKCkKYGBgCmBgYHtyfQpDT1ZJRCAlPiUKICBoZWFkKCkKYGBgCgoKCkRhdGEgU291cmNlIDI6IENvdW50cnlEYXRhCmBgYHtyfQpDb3VudHJ5RGF0YQpgYGAKCmBgYHtyfQpDb3VudHJ5RGF0YSAlPiUKICBucm93KCkKYGBgCmBgYHtyfQpDb3VudHJ5RGF0YSAlPiUKICBuYW1lcygpCmBgYApgYGB7cn0KQ291bnRyeURhdGEgJT4lCiAgaGVhZCgpCmBgYAoKCgpEYXRhIFNvdXJjZSAzOiBDb250aW5lbnRzCmBgYHtyfQpDb250aW5lbnRzIDwtIHJlYWQuY3N2KGZpbGUgPSAiY291bnRyaWVzIGFuZCBjb250aW5lbnRzLmNzdiIpCkNvbnRpbmVudHMKYGBgCmBgYHtyfQpDb250aW5lbnRzICU+JQogIG5yb3coKQpgYGAKYGBge3J9CkNvbnRpbmVudHMgJT4lCiAgbmFtZXMoKQpgYGAKYGBge3J9CkNvbnRpbmVudHMgJT4lCiAgaGVhZCgpCmBgYAoKCgoKIyMjIERhdGEgV3JhbmdsaW5nIG9mIENPVklEIERhdGFzZXQKCmBgYHtyfQpDT1ZJRApgYGAKClNpbmNlIHdlIGFyZSBzb2xleSBmb2N1c2VkIG9uIHRoZSBzcHJlYWQgb2YgQ09WSUQtMTksIGZpbHRlciBvdXQgZGVhdGggY291bnQuCgpgYGB7cn0KVGlkeUNPVklEIDwtIENPVklEICU+JQogIHJlbmFtZShjb3VudHJ5ID0gdG90YWwuY292aWQuY2FzZXMuZGVhdGhzLnBlci5taWxsaW9uICkgJT4lCiAgcmVuYW1lKCBDb2RlID0gWCApICU+JQogIHJlbmFtZShEYXRlID0gWC4xICkgJT4lCiAgcmVuYW1lKENhc2VzUGVyTWlsbGlvbiA9IFguMykgJT4lCiAgZmlsdGVyKHJvd19udW1iZXIoKSA+IDEpICU+JQogIHN1YnNldChzZWxlY3QgPSBjKDEsMyw1KSkgJT4lCiAgbXV0YXRlKCBjb3VudHJ5ID0gYXMuY2hhcmFjdGVyKGNvdW50cnkpICkgJT4lCiAgbXV0YXRlKERhdGUgPSBtZHkoRGF0ZSkpICU+JQogIG11dGF0ZShDYXNlc1Blck1pbGxpb24gPSBhcy5pbnRlZ2VyKENhc2VzUGVyTWlsbGlvbikgLSAxKQoKCmBgYAoKCmBgYHtyfQpUaWR5Q09WSUQKCmBgYAoKCgpgYGB7cn0KClJlbGV2YW50Q291bnRyeURhdGEgPC0KICBDb3VudHJ5RGF0YSAlPiUKICBzdWJzZXQoc2VsZWN0ID0gYygxLDIsMykpICU+JQogIG11dGF0ZShwb3BkZW5zaXR5ID0gcm91bmQocG9wL2FyZWEsIGRpZ2l0cyA9IDIpKQoKYGBgCgpgYGB7cn0KCk1hc3RlckRhdGEgPC0gbGVmdF9qb2luKFRpZHlDT1ZJRCwgUmVsZXZhbnRDb3VudHJ5RGF0YSkKCmBgYAoKCgpgYGB7cn0KCk1hc3RlckRhdGEgPC0KICBNYXN0ZXJEYXRhICU+JQogIGZpbHRlcihjb3VudHJ5ICE9ICJBZnJpY2EiLAogICAgICAgICBjb3VudHJ5ICE9ICJBc2lhIiwKICAgICAgICAgY291bnRyeSAhPSAiRXVyb3BlIiwKICAgICAgICAgY291bnRyeSAhPSAiTm9ydGggQW1lcmljYSIsCiAgICAgICAgIGNvdW50cnkgIT0gIk9jZWFuaWEiLAogICAgICAgICBjb3VudHJ5ICE9ICJTb3V0aCBBbWVyaWNhIiwKICAgICAgICAgY291bnRyeSAhPSAiV29ybGQiCiAgICAgICAgICkgJT4lCiAgbXV0YXRlKCJDYXNlcyIgPSAoQ2FzZXNQZXJNaWxsaW9uICogcm91bmQocG9wLzEwMDAwMDAsIGRpZ2l0cyA9IDApKSkKICAKICAKICBNYXN0ZXJEYXRhCmBgYAoKCgpgYGB7cn0KCkZpcnN0SW5zdGFuY2UgPC0KICBNYXN0ZXJEYXRhICU+JQogIGZpbHRlcihDYXNlcyAhPSAwKSAlPiUKICBncm91cF9ieShjb3VudHJ5KSAlPiUKICBzdW1tYXJpc2UoYmVnaW5uaW5nb2ZzcHJlYWQgPSBtaW4oRGF0ZSkpCiAgCkZpcnN0SW5zdGFuY2UKCgpgYGAKCgoKCgoKYGBge3J9CgpEYWlseVNwcmVhZCA8LQogIGxlZnRfam9pbihNYXN0ZXJEYXRhLCBGaXJzdEluc3RhbmNlKSAlPiUKICBmaWx0ZXIoRGF0ZSA9PSAiMjAyMC0wNC0wNSIpICU+JQogIG11dGF0ZShkYXlzZWxhcHNlZCA9IERhdGUgLSBiZWdpbm5pbmdvZnNwcmVhZCkgJT4lCiAgbXV0YXRlKGRhaWx5c3ByZWFkID0gQ2FzZXMgLyBhcy5udW1lcmljKGRheXNlbGFwc2VkKSApICU+JQogIGFycmFuZ2UoZGVzYyhkYWlseXNwcmVhZCkpCgpgYGAKCgoKYGBge3J9CgpNYXN0ZXJEYXRhIDwtCiAgbGVmdF9qb2luKE1hc3RlckRhdGEsIERhaWx5U3ByZWFkKQoKYGBgCgpgYGB7cn0KCgpnZ3Bsb3QoZGF0YT1NYXN0ZXJEYXRhLGFlcyh4PXBvcCx5PWRhaWx5c3ByZWFkKSkrZ2VvbV9wb2ludCgpIAoKCgpgYGAKCgoKCmBgYHtyfQpNYXN0ZXJEYXRhCgpgYGAKCgpgYGB7cn0KCk1hc3RlckRhdGEgJT4lCiAgZ3JvdXBfYnkoRGF0ZSkgJT4lCiAgc3VtbWFyaXNlKHRvdGFsY2FzZXMgPSBzdW0oQ2FzZXMsIG5hLnJtPVQpKSAlPiUKICBnZ3Bsb3QoYWVzKHg9RGF0ZSx5PXRvdGFsY2FzZXMpKStnZW9tX3BvaW50KCkgCgpgYGA=